Main Figures
Figures 3, 5 and 6 were constructed manually.
Figure 4: A-domains
Reconstructing the domains
cols <- readRDS(file = "Fig4/Colors_arrow.Rds")
cols.origin <- readRDS(file = "Fig4/Colors_arrow_origin.Rds")
domains.species <- readRDS("Fig4/domains_species.Rds")
dom.gene.lenght <- readRDS("Fig4/domains_gene_length.Rds")
bashet.nrps <- ggplot(domains.species[[1]], aes(x=1)) + geom_segment(data = dom.gene.lenght[[1]], aes(x=0,xend=end,y=position, yend=position)) + geom_segment(aes(x=start,xend=finish, y=position, yend=position, color=Class), arrow=arrow(length=unit(0.15,"cm"), type = "closed"), size=2) + theme_classic() + facet_grid(Gene ~ ., scales = "free") + xlab("Position") + ylab("NRPS gene") + scale_color_manual(values = cols[names(cols) %in% unique(domains.species[[1]]$Class)]) + theme(axis.text.y=element_blank(),axis.ticks=element_blank(),legend.position = "bottom",strip.text.y = element_text(angle = 0), axis.line.y = element_blank()) + guides(fill=FALSE)
basmer.jgi.nrps <- ggplot(domains.species[[2]], aes(x=1)) + geom_segment(data = dom.gene.lenght[[2]], aes(x=0,xend=end,y=position, yend=position)) + geom_segment(aes(x=start,xend=finish, y=position, yend=position, color=Class), arrow=arrow(length=unit(0.15,"cm"), type = "closed"), size=2) + theme_classic() + facet_grid(Gene ~ ., scales = "free") + xlab("Position") + ylab("NRPS gene") + scale_color_manual(values = cols[names(cols) %in% unique(domains.species[[2]]$Class)]) + theme(axis.text.y=element_blank(),axis.ticks=element_blank(),legend.position = "bottom",strip.text.y = element_text(angle = 0), axis.line.y = element_blank()) + guides(fill=FALSE)
basmer.b9252.nrps <- ggplot(domains.species[[3]], aes(x=1)) + geom_segment(data = dom.gene.lenght[[3]], aes(x=0,xend=end,y=position, yend=position)) + geom_segment(aes(x=start,xend=finish, y=position, yend=position, color=Class), arrow=arrow(length=unit(0.15,"cm"), type = "closed"), size=2) + theme_classic() + facet_grid(Gene ~ ., scales = "free") + xlab("Position") + ylab("NRPS gene") + scale_color_manual(values = cols[names(cols) %in% unique(domains.species[[3]]$Class)]) + theme(axis.text.y=element_blank(),axis.ticks=element_blank(),legend.position = "bottom",strip.text.y = element_text(angle = 0), axis.line.y = element_blank()) + guides(fill=FALSE)
# Plot
grid.arrange(bashet.nrps, basmer.jgi.nrps, basmer.b9252.nrps)

Figure 7: HGT counts
HGT.counts <- read.table("Fig7/HGT_counts.txt", sep = "\t", header = T)
HGT.counts$Origin <- factor(HGT.counts$Origin, levels = c("a-proteobacteria","b-proteobacteria","d-proteobacteria","e-proteobacteria","g-proteobacteria","proteobacteria","firmicutes","actinobacteria","high GC Gram+","enterobacteria","planctomycetes","CFB group bacteria","GNS bacteria","verrucomicrobia","fusobacteria","cyanobacteria","chlamydias","mycoplasmas","aquificales","bacteria","euryarchaeotes","archaea"))
HGT.counts.prop <- HGT.counts
HGT.counts.prop$bashet <- HGT.counts$bashet/9331
HGT.counts.prop$basmer_B9252 <- HGT.counts$basmer_B9252/13273
HGT.counts.prop$basmer_HGT <- HGT.counts$basmer_HGT/16111
HGT.counts[order(HGT.counts$Origin, HGT.counts$Origin),]
HGT.counts.prop[order(HGT.counts.prop$Origin, HGT.counts$Origin),]
HGT.counts.m <- melt(HGT.counts.prop)
# Plot
ggplot(HGT.counts.m, aes(x=variable,y=value,fill=Origin)) + geom_bar(stat='identity', position = 'fill') + scale_fill_manual(values = c("#FFAAAA","#E37B7B","#D46A6A","#801515","#550000","#2B0000","#ED8229","#03D5D5","#198E8E","#674A33","#E9038F","#FFFF04","#ED5DBA","#72335B","#7F0855","#08AF13","#AA5704","#73239F","#1CA06E","#221617","#736058","#4E4E4E")) + theme_bw() + xlab("Taxon") + ylab("Normalized proportion of genes with HGT evidence ") + theme_classic()
Supplementary figures
Sup. Figs 2, 3 and 12 were constructed by hand
Sup. Figure 1: NRPS tree
zygo_tree <- read.tree("SF1/NRPS.tre")
# Reading assignments from Bushley 2010
table.names <- read.table("SF1/Bushley_names.txt", sep = "\t", header = T, stringsAsFactors = F)
# Assigning names
alin.names.org <- data.frame(names=zygo_tree$tip.label, group=table.names$Group[match(zygo_tree$tip.label,table.names$name.or.i.)], stringsAsFactors = F)
# Names_step1
names.step1 <- strsplit(alin.names.org$names, split = "_") %>% lapply(function (x) paste(x[1], x[2])) %>% unlist
names.step1[grep(names.step1, pattern = "/")] <- grep(names.step1, pattern = "/", value = T) %>% strsplit(split = " ") %>% lapply(function (x) x[1]) %>% unlist
names.step1 <- gsub(names.step1, pattern = " 1$", replacement = "_1") %>% gsub(pattern = " 2$", replacement = "_2") %>% gsub(perl = T, pattern = " \\d+$", replacement = "_JGI")
names_NRPS <- data.frame(tip.label= zygo_tree$tip.label, file= names.step1, stringsAsFactors = F)
# Separating by genus
names_NRPS$file <- strsplit(names_NRPS$file, split = " ", fixed = T) %>% lapply(function (x) x[1]) %>% unlist %>% gsub(pattern = "jgi\\|", replacement = "") %>% strsplit(split = "|", fixed = T) %>% lapply(function (x) x[1]) %>% unlist
names_NRPS$genus <- names_NRPS$file
# JGI names
all.jgi <- read.csv('JGI_code_taxonomy.csv')
head(all.jgi)
## Phylum_name Subphylum_name Class_name Order_name
## 1 Ascomycota Pezizomycotina Dothideomycetes Pleosporales
## 2 Basidiomycota Agaricomycotina Agaricomycetes Polyporales
## 3 Basidiomycota Agaricomycotina Agaricomycetes Polyporales
## 4 Mucoromycota Mucoromycotina Incertae_sedis Mucorales
## 5 Mucoromycota Mucoromycotina Incertae_sedis Mucorales
## 6 Basidiomycota Ustilaginomycotina Exobasidiomycetes Exobasidiales
## Family_name file Species
## 1 Dacampiaceae Aaoar1 Aaosphaeria arxii CBS 175.79 v1.0
## 2 Meruliaceae Abobi1 Abortiporus biennis CCBS 521 v1.0
## 3 Meruliaceae Abobie1 Abortiporus biennis CIRM-BRFM1778 v1.0
## 4 Cunninghamellaceae Chlpad1 Absidia padenii NRRL 2977 v1.0
## 5 Cunninghamellaceae Absrep1 Absidia repens NRRL 1336 v1.0
## 6 Cryptobasidiaceae Acain1 Acaromyces ingoldii MCA 4198 v1.0
all.jgi$genus <- strsplit(as.character(all.jgi$Species), split = " ", fixed = T) %>% lapply(function (x) x[1])
# Merging the names
zygo.tree <- merge(names_NRPS, all.jgi,by = 'file', no.dups = T, sort = F)
zygo.tree <- zygo.tree[c(2,4)]
fungi.tree <- merge(names_NRPS, all.jgi,by = 'genus', no.dups = T, sort = F)
fungi.tree <- fungi.tree[c(2,4)]
fungi.tree <- fungi.tree[!duplicated(fungi.tree),]
fungi.all <- rbind(fungi.tree, zygo.tree)
fungi.all <- fungi.all[grep(names_NRPS$tip.label, pattern = "Basme2finSC",invert = T),]
fungi.all <- rbind(fungi.all, data.frame("tip.label" = grep(names_NRPS$tip.label, pattern = "N161|Basme2finSC", value = T), "Phylum_name" = "BasMer"))
fungi.all <- rbind(fungi.all, data.frame("tip.label" = grep(names_NRPS$tip.label, pattern = "N168", value = T), "Phylum_name" = "BasHet"))
# Plotting the tree
zygo_fort <- fortify(zygo_tree)
## Warning: `data_frame()` is deprecated as of tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: `mutate_()` is deprecated as of dplyr 0.7.0.
## Please use `mutate()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
zygo_fort$bootstrap <- NA
zygo_fort$bootstrap[!zygo_fort$isTip] <- as.numeric(zygo_tree$node.label)
## Warning: NAs introduced by coercion
zygo_fort$bootstrap[zygo_fort$bootstrap < 70] <- NA
ggtree(zygo_fort, size=0.3) %<+% fungi.all + geom_tiplab(size=1, aes(color=Phylum_name)) + geom_text(aes(label=bootstrap), vjust=-.5, hjust=-.5, size=1) + theme(legend.position = "bottom")
## Warning: Removed 1461 rows containing missing values (geom_text).

Figure was finalized by hand in Adobe Illustrator 2020 to color the missing tip labels, improve legibility and legend, and extend root.
Sup. Figure 4: PKS tree
zygo_tree <- read.tree("SF4/PKS.tre")
# Assigning names
alin.names.org <- data.frame(names=zygo_tree$tip.label, stringsAsFactors = F)
# Names_step1
names.step1 <- strsplit(alin.names.org$names, split = "_") %>% lapply(function (x) paste(x[1], x[2])) %>% unlist
names.step1[grep(names.step1, pattern = "/")] <- grep(names.step1, pattern = "/", value = T) %>% strsplit(split = " ") %>% lapply(function (x) x[1]) %>% unlist
names.step1 <- gsub(names.step1, pattern = " 1$", replacement = "_1") %>% gsub(pattern = " 2$", replacement = "_2") %>% gsub(perl = T, pattern = " \\d+$", replacement = "_JGI")
names_NRPS <- data.frame(tip.label= zygo_tree$tip.label, file= names.step1, stringsAsFactors = F)
# Separating by genus
names_NRPS$file <- strsplit(names_NRPS$file, split = " ", fixed = T) %>% lapply(function (x) x[1]) %>% unlist %>% gsub(pattern = "jgi\\|", replacement = "") %>% strsplit(split = "|", fixed = T) %>% lapply(function (x) x[1]) %>% unlist
names_NRPS$genus <- names_NRPS$file
# JGI names
all.jgi <- read.csv('JGI_code_taxonomy.csv')
all.jgi$genus <- strsplit(as.character(all.jgi$Species), split = " ", fixed = T) %>% lapply(function (x) x[1])
# Merging the names
zygo.tree <- merge(names_NRPS, all.jgi,by = 'file', no.dups = T, sort = F)
zygo.tree <- zygo.tree[c(2,4)]
fungi.tree <- merge(names_NRPS, all.jgi,by = 'genus', no.dups = T, sort = F)
fungi.tree <- fungi.tree[c(2,4)]
fungi.all <- rbind(zygo.tree, fungi.tree)
fungi.all <- fungi.all[grep(names_NRPS$tip.label, pattern = "Basme2finSC",invert = T),]
fungi.all <- rbind(fungi.all, data.frame("tip.label" = grep(names_NRPS$tip.label, pattern = "N161|Basme2finSC", value = T), "Phylum_name" = "BasMer"))
fungi.all <- rbind(fungi.all, data.frame("tip.label" = grep(names_NRPS$tip.label, pattern = "N168", value = T), "Phylum_name" = "BasHet"))
# Plotting the tree
zygo_fort <- fortify(zygo_tree)
zygo_fort$bootstrap <- NA
zygo_fort$bootstrap[!zygo_fort$isTip] <- as.numeric(zygo_tree$node.label)
## Warning: NAs introduced by coercion
zygo_fort$bootstrap[zygo_fort$bootstrap < 70] <- NA
ggtree(zygo_fort, size=0.3) %<+% fungi.all + geom_tiplab(size=1, aes(color=Phylum_name)) + geom_text(aes(label=bootstrap), vjust=-.5, hjust=-.5, size=1) + theme(legend.position = "bottom")
## Warning: Removed 650 rows containing missing values (geom_text).

Figure was finalized by hand in Adobe Illustrator 2020 to color the missing tip labels, improve legibility and legend,and extend root.
Sup. Figure 7: TC tree
zygo_tree <- read.tree("SF7/TC.tree")
# Assigning names
alin.names.org <- data.frame(names=zygo_tree$tip.label, stringsAsFactors = F)
# Names_step1
names.step1 <- strsplit(alin.names.org$names, split = "_") %>% lapply(function (x) paste(x[1], x[2])) %>% unlist
names.step1[grep(names.step1, pattern = "/")] <- grep(names.step1, pattern = "/", value = T) %>% strsplit(split = " ") %>% lapply(function (x) x[1]) %>% unlist
names.step1 <- gsub(names.step1, pattern = " 1$", replacement = "_1") %>% gsub(pattern = " 2$", replacement = "_2") %>% gsub(perl = T, pattern = " \\d+$", replacement = "_JGI")
names_TC <- data.frame(tip.label= zygo_tree$tip.label, file= names.step1, stringsAsFactors = F)
# Separating by genus
names_TC$file <- strsplit(names_TC$file, split = " ", fixed = T) %>% lapply(function (x) x[1]) %>% unlist %>% gsub(pattern = "jgi\\|", replacement = "") %>% strsplit(split = "|", fixed = T) %>% lapply(function (x) x[1]) %>% unlist
names_TC$genus <- names_TC$file
# JGI names
all.jgi <- read.csv('JGI_code_taxonomy.csv')
all.jgi$genus <- strsplit(as.character(all.jgi$Species), split = " ", fixed = T) %>% lapply(function (x) x[1])
# Merging the names
zygo.tree <- merge(names_TC, all.jgi,by = 'file', no.dups = T, sort = F)
zygo.tree <- zygo.tree[c(2,4)]
fungi.tree <- merge(names_TC, all.jgi,by = 'genus', no.dups = T, sort = F)
fungi.tree <- fungi.tree[c(2,4)]
fungi.all <- rbind(zygo.tree, fungi.tree)
fungi.all <- fungi.all[grep(names_TC$tip.label, pattern = "Basme2finSC",invert = T),]
fungi.all <- rbind(fungi.all, data.frame("tip.label" = grep(names_TC$tip.label, pattern = "N161|Basme2finSC", value = T), "Phylum_name" = "BasMer"))
fungi.all <- rbind(fungi.all, data.frame("tip.label" = grep(names_TC$tip.label, pattern = "N168", value = T), "Phylum_name" = "BasHet"))
# Plotting the tree
zygo_fort <- fortify(zygo_tree)
zygo_fort$bootstrap <- NA
zygo_fort$bootstrap[!zygo_fort$isTip] <- as.numeric(zygo_tree$node.label)
## Warning: NAs introduced by coercion
zygo_fort$bootstrap[zygo_fort$bootstrap < 70] <- NA
ggtree(zygo_fort, size=0.3) %<+% fungi.all + geom_tiplab(size=1, aes(color=Phylum_name)) + geom_text(aes(label=bootstrap), vjust=-.5, hjust=-.5, size=1) + theme(legend.position = "bottom")
## Warning: Removed 1609 rows containing missing values (geom_text).

Figure was finalized by hand in Adobe Illustrator 2020 to color the missing tip labels, improve legibility and legend,and extend root.
Sup. Figure 8: HGT assay
jcf7180000803233 <- readRDS(file = "SF8/jcf7180000803233_HGTplot.Rds")
chom.sub.all <- readRDS(file = "SF8/jcf7180000803233_Zscores.Rds")
GFF.chrom <- readRDS(file = "SF8/GFF_chrom.jcf7180000803233.Rds")
# Coverage plot
cov.plot <-ggplot(data = jcf7180000803233, aes(x=Pos, y=Cov)) +
geom_line() +
geom_segment(data = GFF.chrom, aes(x=Start, xend=End, y=-1, yend=-1, color=Origin),arrow=arrow(length=unit(0.1,"inches"), type = "closed"), size = 1, lineend = "butt", linejoin = "mitre") +
scale_y_continuous(breaks=seq(0,1500,100)) +
theme_classic() +
theme(legend.position = "bottom")
# Z-score plot
zscore.plot <- ggplot(data=chom.sub.all, aes(x=Pos, y=zscore)) +
geom_point(size=0.5) +
geom_hline(yintercept=c(-1, 1), color="forestgreen") +
geom_hline(yintercept=c(-2, 2), color="red") +
geom_segment(data = GFF.chrom, aes(x=Start, xend=End, y=-1, yend=-1, color=Origin), size = 3) +
theme_bw()
# Plot
ggarrange(cov.plot, zscore.plot ,nrow = 2, labels = c("Coverage", "Zscore"))

jcf7180000797043 <- readRDS(file = "SF8/jcf7180000797043_HGTplot.Rds")
chom.sub.all <- readRDS(file = "SF8/jcf7180000797043_Zscores.Rds")
GFF.chrom <- readRDS(file = "SF8/GFF_chrom.jcf7180000797043.Rds")
# Coverage plot
cov.plot <-ggplot(data = jcf7180000797043, aes(x=Pos, y=Cov)) +
geom_line() +
geom_segment(data = GFF.chrom, aes(x=Start, xend=End, y=-1, yend=-1, color=Origin),arrow=arrow(length=unit(0.1,"inches"), type = "closed"), size = 1, lineend = "butt", linejoin = "mitre") +
scale_y_continuous(breaks=seq(0,1500,100)) +
theme_classic() +
theme(legend.position = "bottom")
# Z-score plot
zscore.plot <- ggplot(data=chom.sub.all, aes(x=Pos, y=zscore)) +
geom_point(size=0.5) +
geom_hline(yintercept=c(-1, 1), color="forestgreen") +
geom_hline(yintercept=c(-2, 2), color="red") +
geom_segment(data = GFF.chrom, aes(x=Start, xend=End, y=-1, yend=-1, color=Origin), size = 3) +
theme_bw()
# Plot
ggarrange(cov.plot, zscore.plot ,nrow = 2, labels = c("Coverage", "Zscore"))

Figure was finalized by hand in Adobe Illustrator 2020
Revision folder:
The revision folder includes the process of review suggested by G3. The only change in the data analysis was to determine the number of HGT candidate genes that are in scaffolds with no fungal genes. The process and subsequent results are found in the HGT_Revision.Rmd file